GLS and Spatial Autocorrelation model fitting

Read the data from Juan David

I'm going to include a link to the datasource


In [60]:
# Load Biospytial modules and etc.
%matplotlib inline
import sys
sys.path.append('/apps')
sys.path.append('..')
sys.path.append('../../spystats')
import django
django.setup()
import pandas as pd
import matplotlib.pyplot as plt
import numpy as np
## Use the ggplot style
plt.style.use('ggplot')

import tools
import logging
logging.basicConfig(level=logging.INFO)
logger = logging.getLogger(__name__)

In [61]:
import os
input_path = "/RawDataCSV/DN_plot.data/"

In [62]:
filenames = os.listdir(input_path)
fullpath = map(lambda name : input_path+name,filenames)
pds = map(lambda file : pd.read_csv(file), fullpath)

In [63]:
sizes = map(lambda p : p.shape[0],pds)

In [64]:
sizes


Out[64]:
[100000,
 200000,
 300000,
 150000,
 300000,
 450000,
 200000,
 400000,
 600000,
 50000,
 100000,
 150000,
 100000,
 200000,
 300000,
 150000,
 300000,
 450000,
 200000,
 400000,
 600000,
 50000,
 100000,
 150000]

In [65]:
t1 = fullpath[0]
nd = pd.read_csv(t1)

In [66]:
## We group by simulation index

In [67]:
groups = nd.groupby(['sim'])

In [68]:
##  PCOnvert to list to extract appropiate sub fitting, this will be expanded further

In [69]:
gg = list(groups)

Selecting one group


In [70]:
t1 = gg[0][1]

In [71]:
## Convert to geopandas
gt1 =  tools.toGeoDataFrame(t1,xcoord_name='lon',ycoord_name='lat')

In [72]:
## We need to reproject to Alberts or What ever

In [73]:
def prepareDataFrame(dataframe):
    """
    Reads the data and stores it in a geodataframe.
    """
    new_data = tools.toGeoDataFrame(dataframe,xcoord_name='lon',ycoord_name='lat')
    logger.info("Reprojecting to Alberts equal area")
    new_data =  new_data.to_crs("+proj=aea +lat_1=29.5 +lat_2=45.5 +lat_0=37.5 +lon_0=-96 +x_0=0 +y_0=0 +ellps=GRS80 +datum=NAD83 +units=m +no_defs ")
    new_data['logBiomass'] = new_data.apply(lambda x : np.log(x.plotBiomass),axis=1)
    new_data['newLon'] = new_data.apply(lambda c : c.geometry.x, axis=1)
    new_data['newLat'] = new_data.apply(lambda c : c.geometry.y, axis=1)
    new_data['logBiomass'] = np.log(new_data.plotBiomass)
    new_data['logSppN'] = np.log(new_data.SppN)
    logger.info("Removing possible duplicates. \n This avoids problems of Non Positive semidefinite")
    new_data = new_data.drop_duplicates(subset=['newLon','newLat'])
    return new_data

In [77]:
ngt1 = prepareDataFrame(t1)
sill = 0.340246718396
range_ = 41188.0234423 # (meters)
nugget = 0.329937603763
alpha = 1.12143687914 
wm = tools.WhittleVariogram(nugget=nugget,sill=sill,range_a=range_,alpha=alpha)
vg1 = tools.Variogram(ngt1,'logSppN',model=wm)
vg1.distance_threshold = 100000
vg1.fitVariogramModel(wm)


ERROR:tools:This model  does not support more than 3 parameters

TypeErrorTraceback (most recent call last)
<ipython-input-77-6d440917951d> in <module>()
      7 vg1 = tools.Variogram(ngt1,'logSppN',model=wm)
      8 vg1.distance_threshold = 100000
----> 9 vg1.fitVariogramModel(wm)

/apps/external_plugins/spystats/spystats/tools.pyc in fitVariogramModel(self, model_instance, parameter_set)
    346             parameter_dict = model_instance.fit(self,parameter_set)
    347         else:
--> 348             parameter_dict = model_instance.fit(self)
    349 
    350 

/apps/external_plugins/spystats/spystats/tools.pyc in fit(self, emp_variogram, init_params)
    728 
    729         try:
--> 730             best_params, covar_model = curve_fit(self.model, xdata=lags, ydata=variogram, p0=init_params)
    731         except TypeError:
    732             logger.error("This model  does not support more than 3 parameters")

/opt/conda/envs/biospytial/lib/python2.7/site-packages/scipy/optimize/minpack.pyc in curve_fit(f, xdata, ydata, p0, sigma, absolute_sigma, check_finite, bounds, method, **kwargs)
    649         # Remove full_output from kwargs, otherwise we're passing it in twice.
    650         return_full = kwargs.pop('full_output', False)
--> 651         res = leastsq(func, p0, args=args, full_output=1, **kwargs)
    652         popt, pcov, infodict, errmsg, ier = res
    653         cost = np.sum(infodict['fvec'] ** 2)

/opt/conda/envs/biospytial/lib/python2.7/site-packages/scipy/optimize/minpack.pyc in leastsq(func, x0, args, Dfun, full_output, col_deriv, ftol, xtol, gtol, maxfev, epsfcn, factor, diag)
    378     m = shape[0]
    379     if n > m:
--> 380         raise TypeError('Improper input: N=%s must not exceed M=%s' % (n, m))
    381     if epsfcn is None:
    382         epsfcn = finfo(dtype).eps

TypeError: Improper input: N=4 must not exceed M=0

In [80]:
vg1.plot(with_envelope=True,n_bins=100)
vg1.plot??


Let's use as initial guess the same parameters than the one obtained in the global variogram.

This is a first pragmatic decision there are other ways to achieve this, like the likelihood approach.

To keep consistency with the global model (based on moment-based estimation) .


In [79]:
vg1.fitVariogramModel(wm)


Out[79]:
< Whittle Variogram : sill 0.253579965123, range 14584.5091955, nugget 0.152226938744, alpha1.75048601477 >

In [40]:
wm


Out[40]:
< Whittle Variogram : sill 0.253580823739, range 14587.3424731, nugget 0.152375897495, alpha1.75090498308 >

In [41]:
x = np.linspace(0,100000,10000)
plt.plot(x,vg1.model.f(x))
vg1.plot(with_envelope=True,n_bins=100)



In [42]:
import statsutils as st

In [ ]:
def prepareDataFrame(dataframe):
    """
    Reads the data and stores it in a geodataframe.
    """
    new_data = tools.toGeoDataFrame(dataframe,xcoord_name='lon',ycoord_name='lat')
    logger.info("Reprojecting to Alberts equal area")
    new_data =  new_data.to_crs("+proj=aea +lat_1=29.5 +lat_2=45.5 +lat_0=37.5 +lon_0=-96 +x_0=0 +y_0=0 +ellps=GRS80 +datum=NAD83 +units=m +no_defs ")
    new_data['logBiomass'] = new_data.apply(lambda x : np.log(x.plotBiomass),axis=1)
    new_data['newLon'] = new_data.apply(lambda c : c.geometry.x, axis=1)
    new_data['newLat'] = new_data.apply(lambda c : c.geometry.y, axis=1)
    new_data['logBiomass'] = np.log(new_data.plotBiomass)
    new_data['logSppN'] = np.log(new_data.SppN)
    logger.info("Removing possible duplicates. \n This avoids problems of Non Positive semidefinite")
    new_data = new_data.drop_duplicates(subset=['newLon','newLat'])
    return new_data

In [81]:
def prepareAndFitModel(dataframe,n_bins=100):
    """
    Hard coded function for fitting the model.
    """
    geodataframe = prepareDataFrame(dataframe)
    sill = 0.340246718396
    range_ = 41188.0234423 # (meters)
    nugget = 0.329937603763
    alpha = 1.12143687914 
    wm = tools.WhittleVariogram(nugget=nugget,sill=sill,range_a=range_)
    vg = tools.Variogram(geodataframe,'logSppN',model=wm)
    vg.distance_threshold = 100000
    vg.calculateEmpirical(n_bins=n_bins)
    vg.fitVariogramModel(wm)
    model,results = st.fitLinearLogLogModel(geodataframe)
    geodataframe['residuals'] = results.resid
    fitted_values = st.fitGLSRobust(geodataframe,vg,distance_threshold=100000)
    return fitted_values
    #return vg

In [87]:
resum,variogram,resultspd,results = prepareAndFitModel(t1)


INFO:statsutils:Fitting OLS linear model: logBiomass ~ logSppN 
INFO:statsutils:Building Spatial Covariance Matrix
INFO:statsutils:Calculating Distance Matrix
INFO:statsutils:Calculating GLS estimators
INFO:statsutils:Fitting linear model using GLS
WARNING:statsutils:results <statsmodels.regression.linear_model.RegressionResultsWrapper object at 0x7f1cc4289d50>
INFO:statsutils:RESULTS::: n_obs: 100.0, r-squared: 0.741597826409, {{"Intercept":8.1381818069,"logSppN":0.5877675145},{"Intercept":1.873627897e-73,"logSppN":0.0000000609},{"0":{"Intercept":7.8297464107,"logSppN":0.3888848089},"1":{"Intercept":8.4466172031,"logSppN":0.78665022}}}
INFO:statsutils:Recalculating variogram
INFO:statsutils:Refiting Theoretical Variogram
INFO:statsutils:Variogram parameters: range 3945.62545892, sill 0.499275129871, nugget 0.581023415479
INFO:statsutils:Building Spatial Covariance Matrix
INFO:statsutils:Calculating Distance Matrix
INFO:statsutils:Calculating GLS estimators
INFO:statsutils:Fitting linear model using GLS
WARNING:statsutils:results <statsmodels.regression.linear_model.RegressionResultsWrapper object at 0x7f1cc429b650>
INFO:statsutils:RESULTS::: n_obs: 100.0, r-squared: 0.813765995021, {{"Intercept":8.1420716092,"logSppN":0.5963900683},{"Intercept":1.886664966e-35,"logSppN":0.0000000098},{"0":{"Intercept":7.31213263,"logSppN":0.4076110011},"1":{"Intercept":8.9720105884,"logSppN":0.7851691355}}}
INFO:statsutils:Recalculating variogram
INFO:statsutils:Refiting Theoretical Variogram
INFO:statsutils:Variogram parameters: range 3956.77998912, sill 0.499274515678, nugget 0.581193609016
INFO:statsutils:Building Spatial Covariance Matrix
INFO:statsutils:Calculating Distance Matrix
INFO:statsutils:Calculating GLS estimators
INFO:statsutils:Fitting linear model using GLS
WARNING:statsutils:results <statsmodels.regression.linear_model.RegressionResultsWrapper object at 0x7f1cc414e210>
INFO:statsutils:RESULTS::: n_obs: 100.0, r-squared: 0.813968494056, {{"Intercept":8.1420890354,"logSppN":0.5963746923},{"Intercept":1.753937222e-35,"logSppN":0.0000000098},{"0":{"Intercept":7.3129232854,"logSppN":0.4075960968},"1":{"Intercept":8.9712547854,"logSppN":0.7851532878}}}
INFO:statsutils:Recalculating variogram
INFO:statsutils:Refiting Theoretical Variogram
INFO:statsutils:Variogram parameters: range 3956.77998912, sill 0.499274515678, nugget 0.581193609016
INFO:statsutils:Building Spatial Covariance Matrix
INFO:statsutils:Calculating Distance Matrix
INFO:statsutils:Calculating GLS estimators
INFO:statsutils:Fitting linear model using GLS
WARNING:statsutils:results <statsmodels.regression.linear_model.RegressionResultsWrapper object at 0x7f1cc429b5d0>
INFO:statsutils:RESULTS::: n_obs: 100.0, r-squared: 0.813968494056, {{"Intercept":8.1420890354,"logSppN":0.5963746923},{"Intercept":1.753937222e-35,"logSppN":0.0000000098},{"0":{"Intercept":7.3129232854,"logSppN":0.4075960968},"1":{"Intercept":8.9712547854,"logSppN":0.7851532878}}}
INFO:statsutils:Recalculating variogram
INFO:statsutils:Refiting Theoretical Variogram
INFO:statsutils:Variogram parameters: range 3956.77998912, sill 0.499274515678, nugget 0.581193609016
INFO:statsutils:Building Spatial Covariance Matrix
INFO:statsutils:Calculating Distance Matrix
INFO:statsutils:Calculating GLS estimators
INFO:statsutils:Fitting linear model using GLS
WARNING:statsutils:results <statsmodels.regression.linear_model.RegressionResultsWrapper object at 0x7f1cd02efa10>
INFO:statsutils:RESULTS::: n_obs: 100.0, r-squared: 0.813968494056, {{"Intercept":8.1420890354,"logSppN":0.5963746923},{"Intercept":1.753937222e-35,"logSppN":0.0000000098},{"0":{"Intercept":7.3129232854,"logSppN":0.4075960968},"1":{"Intercept":8.9712547854,"logSppN":0.7851532878}}}
INFO:statsutils:Recalculating variogram
INFO:statsutils:Refiting Theoretical Variogram
INFO:statsutils:Variogram parameters: range 3956.77998912, sill 0.499274515678, nugget 0.581193609016
INFO:statsutils:Building Spatial Covariance Matrix
INFO:statsutils:Calculating Distance Matrix
INFO:statsutils:Calculating GLS estimators
INFO:statsutils:Fitting linear model using GLS
WARNING:statsutils:results <statsmodels.regression.linear_model.RegressionResultsWrapper object at 0x7f1cd02e3150>
INFO:statsutils:RESULTS::: n_obs: 100.0, r-squared: 0.813968494056, {{"Intercept":8.1420890354,"logSppN":0.5963746923},{"Intercept":1.753937222e-35,"logSppN":0.0000000098},{"0":{"Intercept":7.3129232854,"logSppN":0.4075960968},"1":{"Intercept":8.9712547854,"logSppN":0.7851532878}}}
INFO:statsutils:Recalculating variogram
INFO:statsutils:Refiting Theoretical Variogram
INFO:statsutils:Variogram parameters: range 3956.77998912, sill 0.499274515678, nugget 0.581193609016
INFO:statsutils:Building Spatial Covariance Matrix
INFO:statsutils:Calculating Distance Matrix
INFO:statsutils:Calculating GLS estimators
INFO:statsutils:Fitting linear model using GLS
WARNING:statsutils:results <statsmodels.regression.linear_model.RegressionResultsWrapper object at 0x7f1cdc0cbdd0>
INFO:statsutils:RESULTS::: n_obs: 100.0, r-squared: 0.813968494056, {{"Intercept":8.1420890354,"logSppN":0.5963746923},{"Intercept":1.753937222e-35,"logSppN":0.0000000098},{"0":{"Intercept":7.3129232854,"logSppN":0.4075960968},"1":{"Intercept":8.9712547854,"logSppN":0.7851532878}}}
INFO:statsutils:Recalculating variogram
INFO:statsutils:Refiting Theoretical Variogram
INFO:statsutils:Variogram parameters: range 3956.77998912, sill 0.499274515678, nugget 0.581193609016
INFO:statsutils:Building Spatial Covariance Matrix
INFO:statsutils:Calculating Distance Matrix
INFO:statsutils:Calculating GLS estimators
INFO:statsutils:Fitting linear model using GLS
WARNING:statsutils:results <statsmodels.regression.linear_model.RegressionResultsWrapper object at 0x7f1cc4166850>
INFO:statsutils:RESULTS::: n_obs: 100.0, r-squared: 0.813968494056, {{"Intercept":8.1420890354,"logSppN":0.5963746923},{"Intercept":1.753937222e-35,"logSppN":0.0000000098},{"0":{"Intercept":7.3129232854,"logSppN":0.4075960968},"1":{"Intercept":8.9712547854,"logSppN":0.7851532878}}}
INFO:statsutils:Recalculating variogram
INFO:statsutils:Refiting Theoretical Variogram
INFO:statsutils:Variogram parameters: range 3956.77998912, sill 0.499274515678, nugget 0.581193609016
INFO:statsutils:Building Spatial Covariance Matrix
INFO:statsutils:Calculating Distance Matrix
INFO:statsutils:Calculating GLS estimators
INFO:statsutils:Fitting linear model using GLS
WARNING:statsutils:results <statsmodels.regression.linear_model.RegressionResultsWrapper object at 0x7f1cd00f1690>
INFO:statsutils:RESULTS::: n_obs: 100.0, r-squared: 0.813968494056, {{"Intercept":8.1420890354,"logSppN":0.5963746923},{"Intercept":1.753937222e-35,"logSppN":0.0000000098},{"0":{"Intercept":7.3129232854,"logSppN":0.4075960968},"1":{"Intercept":8.9712547854,"logSppN":0.7851532878}}}
INFO:statsutils:Recalculating variogram
INFO:statsutils:Refiting Theoretical Variogram
INFO:statsutils:Variogram parameters: range 3956.77998912, sill 0.499274515678, nugget 0.581193609016
INFO:statsutils:Building Spatial Covariance Matrix
INFO:statsutils:Calculating Distance Matrix
INFO:statsutils:Calculating GLS estimators
INFO:statsutils:Fitting linear model using GLS
WARNING:statsutils:results <statsmodels.regression.linear_model.RegressionResultsWrapper object at 0x7f1d371f23d0>
INFO:statsutils:RESULTS::: n_obs: 100.0, r-squared: 0.813968494056, {{"Intercept":8.1420890354,"logSppN":0.5963746923},{"Intercept":1.753937222e-35,"logSppN":0.0000000098},{"0":{"Intercept":7.3129232854,"logSppN":0.4075960968},"1":{"Intercept":8.9712547854,"logSppN":0.7851532878}}}
INFO:statsutils:Recalculating variogram
INFO:statsutils:Refiting Theoretical Variogram
INFO:statsutils:Variogram parameters: range 3956.77998912, sill 0.499274515678, nugget 0.581193609016
INFO:statsutils:Building Spatial Covariance Matrix
INFO:statsutils:Calculating Distance Matrix
INFO:statsutils:Calculating GLS estimators
INFO:statsutils:Fitting linear model using GLS
WARNING:statsutils:results <statsmodels.regression.linear_model.RegressionResultsWrapper object at 0x7f1cd00bce10>
INFO:statsutils:RESULTS::: n_obs: 100.0, r-squared: 0.813968494056, {{"Intercept":8.1420890354,"logSppN":0.5963746923},{"Intercept":1.753937222e-35,"logSppN":0.0000000098},{"0":{"Intercept":7.3129232854,"logSppN":0.4075960968},"1":{"Intercept":8.9712547854,"logSppN":0.7851532878}}}
INFO:statsutils:Recalculating variogram
INFO:statsutils:Refiting Theoretical Variogram
INFO:statsutils:Variogram parameters: range 3956.77998912, sill 0.499274515678, nugget 0.581193609016
INFO:statsutils:Building Spatial Covariance Matrix
INFO:statsutils:Calculating Distance Matrix
INFO:statsutils:Calculating GLS estimators
INFO:statsutils:Fitting linear model using GLS
WARNING:statsutils:results <statsmodels.regression.linear_model.RegressionResultsWrapper object at 0x7f1cdc0dae50>
INFO:statsutils:RESULTS::: n_obs: 100.0, r-squared: 0.813968494056, {{"Intercept":8.1420890354,"logSppN":0.5963746923},{"Intercept":1.753937222e-35,"logSppN":0.0000000098},{"0":{"Intercept":7.3129232854,"logSppN":0.4075960968},"1":{"Intercept":8.9712547854,"logSppN":0.7851532878}}}
INFO:statsutils:Recalculating variogram
INFO:statsutils:Refiting Theoretical Variogram
INFO:statsutils:Variogram parameters: range 3956.77998912, sill 0.499274515678, nugget 0.581193609016
INFO:statsutils:Building Spatial Covariance Matrix
INFO:statsutils:Calculating Distance Matrix
INFO:statsutils:Calculating GLS estimators
INFO:statsutils:Fitting linear model using GLS
WARNING:statsutils:results <statsmodels.regression.linear_model.RegressionResultsWrapper object at 0x7f1cdc0dabd0>
INFO:statsutils:RESULTS::: n_obs: 100.0, r-squared: 0.813968494056, {{"Intercept":8.1420890354,"logSppN":0.5963746923},{"Intercept":1.753937222e-35,"logSppN":0.0000000098},{"0":{"Intercept":7.3129232854,"logSppN":0.4075960968},"1":{"Intercept":8.9712547854,"logSppN":0.7851532878}}}
INFO:statsutils:Recalculating variogram
INFO:statsutils:Refiting Theoretical Variogram
INFO:statsutils:Variogram parameters: range 3956.77998912, sill 0.499274515678, nugget 0.581193609016
INFO:statsutils:Building Spatial Covariance Matrix
INFO:statsutils:Calculating Distance Matrix
INFO:statsutils:Calculating GLS estimators
INFO:statsutils:Fitting linear model using GLS
WARNING:statsutils:results <statsmodels.regression.linear_model.RegressionResultsWrapper object at 0x7f1cc451efd0>
INFO:statsutils:RESULTS::: n_obs: 100.0, r-squared: 0.813968494056, {{"Intercept":8.1420890354,"logSppN":0.5963746923},{"Intercept":1.753937222e-35,"logSppN":0.0000000098},{"0":{"Intercept":7.3129232854,"logSppN":0.4075960968},"1":{"Intercept":8.9712547854,"logSppN":0.7851532878}}}
INFO:statsutils:Recalculating variogram
INFO:statsutils:Refiting Theoretical Variogram
INFO:statsutils:Variogram parameters: range 3956.77998912, sill 0.499274515678, nugget 0.581193609016
INFO:statsutils:Building Spatial Covariance Matrix
INFO:statsutils:Calculating Distance Matrix
INFO:statsutils:Calculating GLS estimators
INFO:statsutils:Fitting linear model using GLS
WARNING:statsutils:results <statsmodels.regression.linear_model.RegressionResultsWrapper object at 0x7f1cc450d510>
INFO:statsutils:RESULTS::: n_obs: 100.0, r-squared: 0.813968494056, {{"Intercept":8.1420890354,"logSppN":0.5963746923},{"Intercept":1.753937222e-35,"logSppN":0.0000000098},{"0":{"Intercept":7.3129232854,"logSppN":0.4075960968},"1":{"Intercept":8.9712547854,"logSppN":0.7851532878}}}
INFO:statsutils:Recalculating variogram
INFO:statsutils:Refiting Theoretical Variogram
INFO:statsutils:Variogram parameters: range 3956.77998912, sill 0.499274515678, nugget 0.581193609016
INFO:statsutils:Building Spatial Covariance Matrix
INFO:statsutils:Calculating Distance Matrix
INFO:statsutils:Calculating GLS estimators
INFO:statsutils:Fitting linear model using GLS
WARNING:statsutils:results <statsmodels.regression.linear_model.RegressionResultsWrapper object at 0x7f1cd7e03210>
INFO:statsutils:RESULTS::: n_obs: 100.0, r-squared: 0.813968494056, {{"Intercept":8.1420890354,"logSppN":0.5963746923},{"Intercept":1.753937222e-35,"logSppN":0.0000000098},{"0":{"Intercept":7.3129232854,"logSppN":0.4075960968},"1":{"Intercept":8.9712547854,"logSppN":0.7851532878}}}
INFO:statsutils:Recalculating variogram
INFO:statsutils:Refiting Theoretical Variogram
INFO:statsutils:Variogram parameters: range 3956.77998912, sill 0.499274515678, nugget 0.581193609016
INFO:statsutils:Building Spatial Covariance Matrix
INFO:statsutils:Calculating Distance Matrix
INFO:statsutils:Calculating GLS estimators
INFO:statsutils:Fitting linear model using GLS
WARNING:statsutils:results <statsmodels.regression.linear_model.RegressionResultsWrapper object at 0x7f1cd7e22c10>
INFO:statsutils:RESULTS::: n_obs: 100.0, r-squared: 0.813968494056, {{"Intercept":8.1420890354,"logSppN":0.5963746923},{"Intercept":1.753937222e-35,"logSppN":0.0000000098},{"0":{"Intercept":7.3129232854,"logSppN":0.4075960968},"1":{"Intercept":8.9712547854,"logSppN":0.7851532878}}}
INFO:statsutils:Recalculating variogram
INFO:statsutils:Refiting Theoretical Variogram
INFO:statsutils:Variogram parameters: range 3956.77998912, sill 0.499274515678, nugget 0.581193609016
INFO:statsutils:Building Spatial Covariance Matrix
INFO:statsutils:Calculating Distance Matrix
INFO:statsutils:Calculating GLS estimators
INFO:statsutils:Fitting linear model using GLS
WARNING:statsutils:results <statsmodels.regression.linear_model.RegressionResultsWrapper object at 0x7f1cc4531090>
INFO:statsutils:RESULTS::: n_obs: 100.0, r-squared: 0.813968494056, {{"Intercept":8.1420890354,"logSppN":0.5963746923},{"Intercept":1.753937222e-35,"logSppN":0.0000000098},{"0":{"Intercept":7.3129232854,"logSppN":0.4075960968},"1":{"Intercept":8.9712547854,"logSppN":0.7851532878}}}
INFO:statsutils:Recalculating variogram
INFO:statsutils:Refiting Theoretical Variogram
INFO:statsutils:Variogram parameters: range 3956.77998912, sill 0.499274515678, nugget 0.581193609016
INFO:statsutils:Building Spatial Covariance Matrix
INFO:statsutils:Calculating Distance Matrix
INFO:statsutils:Calculating GLS estimators
INFO:statsutils:Fitting linear model using GLS
WARNING:statsutils:results <statsmodels.regression.linear_model.RegressionResultsWrapper object at 0x7f1cd7e03d90>
INFO:statsutils:RESULTS::: n_obs: 100.0, r-squared: 0.813968494056, {{"Intercept":8.1420890354,"logSppN":0.5963746923},{"Intercept":1.753937222e-35,"logSppN":0.0000000098},{"0":{"Intercept":7.3129232854,"logSppN":0.4075960968},"1":{"Intercept":8.9712547854,"logSppN":0.7851532878}}}
INFO:statsutils:Recalculating variogram
INFO:statsutils:Refiting Theoretical Variogram
INFO:statsutils:Variogram parameters: range 3956.77998912, sill 0.499274515678, nugget 0.581193609016
INFO:statsutils:Building Spatial Covariance Matrix
INFO:statsutils:Calculating Distance Matrix
INFO:statsutils:Calculating GLS estimators
INFO:statsutils:Fitting linear model using GLS
WARNING:statsutils:results <statsmodels.regression.linear_model.RegressionResultsWrapper object at 0x7f1cc4522b10>
INFO:statsutils:RESULTS::: n_obs: 100.0, r-squared: 0.813968494056, {{"Intercept":8.1420890354,"logSppN":0.5963746923},{"Intercept":1.753937222e-35,"logSppN":0.0000000098},{"0":{"Intercept":7.3129232854,"logSppN":0.4075960968},"1":{"Intercept":8.9712547854,"logSppN":0.7851532878}}}
INFO:statsutils:Recalculating variogram
INFO:statsutils:Refiting Theoretical Variogram
INFO:statsutils:Variogram parameters: range 3956.77998912, sill 0.499274515678, nugget 0.581193609016

In [94]:
## We will apply this function to all the data in the grpds 

cosas = map(lambda pd : prepareAndFitModel(pd),pds)



MemoryErrorTraceback (most recent call last)
<ipython-input-94-a66f5b850a3c> in <module>()
      1 ## We will apply this function to all the data in the grpds
      2 
----> 3 cosas = map(lambda pd : prepareAndFitModel(pd),pds)

<ipython-input-94-a66f5b850a3c> in <lambda>(pd)
      1 ## We will apply this function to all the data in the grpds
      2 
----> 3 cosas = map(lambda pd : prepareAndFitModel(pd),pds)

<ipython-input-81-4187c403e542> in prepareAndFitModel(dataframe, n_bins)
     11     vg = tools.Variogram(geodataframe,'logSppN',model=wm)
     12     vg.distance_threshold = 100000
---> 13     vg.calculateEmpirical(n_bins=n_bins)
     14     vg.fitVariogramModel(wm)
     15     model,results = st.fitLinearLogLogModel(geodataframe)

/apps/external_plugins/spystats/spystats/tools.pyc in calculateEmpirical(self, n_bins)
    236         """
    237 
--> 238         distances = self.distance_coordinates.flatten()
    239         y = self.distance_responses.flatten()
    240         results = calculateEmpiricalVariogram(distances,y,n_bins=n_bins,distance_threshold=self.distance_threshold)

/apps/external_plugins/spystats/spystats/tools.pyc in distance_coordinates(self)
    218     @property
    219     def distance_coordinates(self):
--> 220         return _getDistanceMatrix(self.data)
    221     @property
    222     def distance_responses(self):

/apps/external_plugins/spystats/spystats/tools.pyc in _getDistanceMatrix(geopandas_dataset)
    108     data = geopandas_dataset
    109     coords = zip(data.centroid.x,data.centroid.y)
--> 110     dM = sp.distance_matrix(coords,coords,p=2.0)
    111     return dM
    112 

/opt/conda/envs/biospytial/lib/python2.7/site-packages/scipy/spatial/kdtree.pyc in distance_matrix(x, y, p, threshold)
    973         return minkowski_distance(x[:,np.newaxis,:],y[np.newaxis,:,:],p)
    974     else:
--> 975         result = np.empty((m,n),dtype=float)  # FIXME: figure out the best dtype
    976         if m < n:
    977             for i in range(m):

MemoryError: 

In [ ]:


In [89]:
x = np.linspace(0,100000,10000)
plt.plot(x,variogram.model.f(x))
variogram.plot(with_envelope=True,n_bins=100)



In [91]:
nd.groupby?

In [ ]:
ngt1['logBiomass','logSppN','lon','lat','newLon','newLat']